Introduction

We already know how to extract useful information from data frames. Various statistics tell us a lot about the data. Nevertheless, values of mean, quantiles and standard deviations are inconvenient for understanding the whole picture.

We get the most information through our eyes, therefore the skill of presenting the data visually is on of the most powerful. By creating simple visualisations, you can make initial hypotheses and understand possible relationships between variables.

Libraries

# install.packages('ggplot2')
library(ggplot2)
library(dplyr)
library(tidyr)

Working data

Today we will work with iris dataset. It id already included in R by default.

This famous (Fisher’s or Anderson’s) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica

# data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Adding new factor

iris_df = iris 
iris_df$Sepal.Length.Category = cut(x=iris_df$Sepal.Length,
                                    breaks = c(4,6,8),
                                    labels = c('short', 'long'))
iris_df
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
##     Sepal.Length.Category
## 1                   short
## 2                   short
## 3                   short
## 4                   short
## 5                   short
## 6                   short
## 7                   short
## 8                   short
## 9                   short
## 10                  short
## 11                  short
## 12                  short
## 13                  short
## 14                  short
## 15                  short
## 16                  short
## 17                  short
## 18                  short
## 19                  short
## 20                  short
## 21                  short
## 22                  short
## 23                  short
## 24                  short
## 25                  short
## 26                  short
## 27                  short
## 28                  short
## 29                  short
## 30                  short
## 31                  short
## 32                  short
## 33                  short
## 34                  short
## 35                  short
## 36                  short
## 37                  short
## 38                  short
## 39                  short
## 40                  short
## 41                  short
## 42                  short
## 43                  short
## 44                  short
## 45                  short
## 46                  short
## 47                  short
## 48                  short
## 49                  short
## 50                  short
## 51                   long
## 52                   long
## 53                   long
## 54                  short
## 55                   long
## 56                  short
## 57                   long
## 58                  short
## 59                   long
## 60                  short
## 61                  short
## 62                  short
## 63                  short
## 64                   long
## 65                  short
## 66                   long
## 67                  short
## 68                  short
## 69                   long
## 70                  short
## 71                  short
## 72                   long
## 73                   long
## 74                   long
## 75                   long
## 76                   long
## 77                   long
## 78                   long
## 79                  short
## 80                  short
## 81                  short
## 82                  short
## 83                  short
## 84                  short
## 85                  short
## 86                  short
## 87                   long
## 88                   long
## 89                  short
## 90                  short
## 91                  short
## 92                   long
## 93                  short
## 94                  short
## 95                  short
## 96                  short
## 97                  short
## 98                   long
## 99                  short
## 100                 short
## 101                  long
## 102                 short
## 103                  long
## 104                  long
## 105                  long
## 106                  long
## 107                 short
## 108                  long
## 109                  long
## 110                  long
## 111                  long
## 112                  long
## 113                  long
## 114                 short
## 115                 short
## 116                  long
## 117                  long
## 118                  long
## 119                  long
## 120                 short
## 121                  long
## 122                 short
## 123                  long
## 124                  long
## 125                  long
## 126                  long
## 127                  long
## 128                  long
## 129                  long
## 130                  long
## 131                  long
## 132                  long
## 133                  long
## 134                  long
## 135                  long
## 136                  long
## 137                  long
## 138                  long
## 139                 short
## 140                  long
## 141                  long
## 142                  long
## 143                 short
## 144                  long
## 145                  long
## 146                  long
## 147                  long
## 148                  long
## 149                  long
## 150                 short

Basic Visualization

plot()

The simplest graph is a points, each having x and y coordinates

x = 1:10
y = seq(2,20,2) ^ 2
x
##  [1]  1  2  3  4  5  6  7  8  9 10
y
##  [1]   4  16  36  64 100 144 196 256 324 400

We consider that the x and y coordinates at the same positions in each of the vectors correspond to one particular point. So we have points (1,2), (2,4), (3,6) and etc.

  • seq(from_value, to_value, by_value)- returns a vector with values from from_value to to_valuewith a step by_value

Scatter plot

plot(x, y)

Sepal.Length vs Sepal.Width

plot(iris_df$Sepal.Length, iris_df$Sepal.Width)

We can make this graph prettier

plot(iris_df$Sepal.Length, iris_df$Sepal.Width,
     main = "Sepal.Length vs. Sepal.Width", # the title
     xlab = "Sepal.Length", # Label of X-axis
     ylab = "Sepal.Width", # Label of Y-axis 
     col = "blue", # color of plot
     pch = 19, # type of dots - 19 corresponds to the painted (solid) points
     cex = 1, # size of dots
     ) 

Adding lines

By default plot() shows scatter plot, but we can change this behavior

plot(x, y,type = 'l') 

plot(x, y,type = 'b', lwd = 3) # lwd = linewidth

plot(x, y, type = 'o')

Terrible plot

Line graphs should sometimes be avoided, because lines connect points in the order of their position in the vectors

x = c(10,5,9,6,8,7,2,1,4,3)
y = c(1:4, 6:8,5,10,9)

plot(x, y, type = "b",  pch=19)

After sorting:

df = data.frame(x, y) %>% arrange(x)
plot(df$x, df$y,type = "b", pch=19)

# Advanced

# indexes_to_sort = order(x)
# x_sorted = x[indexes_to_sort]
# y_sorted = y[indexes_to_sort]
# plot(x_sorted, y_sorted, type = "b", pch=19)

NB! We can’t just sort one of the vectors because we must save the correspondence of x and y coordinates between two vectors.

All plot() style parameters

Bar Plots

heights = c(Roman = 190, Ann = 172, Charlie = 121) # named vector
barplot(heights)

We can make this graph prettier

barplot(heights, 
        main = "Heights of people", # the title 
        xlab = "Height", # Label of X-axis 
        ylab = "Name", # Label of Y-axis 
        col = "lightblue", # color of inner part of bars
        border = "blue", # color of borders
        horiz = TRUE, # make barplot horizontal
        xlim = c(0,200) # limits of values showd on x-axis
        )

Number of iris_df species

number_of_species = table(iris_df$Species)
barplot(number_of_species,
        col = 'violet')

Now we see that our data “is balanced”

Histograms

Distribution of Sepal.Length

hist(iris_df$Sepal.Length, # only x-axis! 
     main = "Sepal.Length distribution", 
     xlab = "Sepal.length", 
     col = "lightgreen", 
     breaks = 8 # number of x-axis splits for frequency calculation in each of the resulting ranges
     )

Each bar represents frequency of iris_dfes with this particular Sepal.Length.
For example, first bar have height of 5 - it mean, that there are 5 flowers with Sepal.Length between 4 and 4.5.

We can make bars two times thinner

hist(iris_df$Sepal.Length,
     main = "Sepal.Length distribution", 
     xlab = "Sepal.length", 
     col = "lightgreen", 
     breaks = 16) # <---- changed

Boxplots

Boxplots are very informative charts. They display similar but more information than a histogram.

boxplot(iris_df$Sepal.Length, 
        # main = "Sepal.Length", 
        ylab = "",
        xlab = "Sepal.Length", 
        col = "darkviolet",
        horizontal = T)

Grouped boxplots

boxplot(iris_df$Sepal.Length ~ iris_df$Species, 
        main = "Sepal.Length",
        xlab = "Species",
        ylab = "Sepal.Length", 
        col = "darkviolet",
        horizontal = F)

Boxplot is good for unimodal similar to normaldistributions, as it doesn’t show two peaks

All basic R functions

ggplot

ggplot2 is the most popular package for charts creations.

Phylosophy

  • The ggplot is based on 3 things: data, aesthetics and geoms (geomertries)

  • Inside the geoms there are aesthetics.

  • Inside the aesthetics we put the variables from the data that we want to see in the plot. These will be our axes.

  • An axis is not just an x and y coordinate - any aesthetic, such as a fill,color,size etc. can also be an (pseudo)axis.

  • Each component in the graphic is added layer by layer

Intro

ggplot(data=iris_df, # data
       mapping = aes(x=Sepal.Length, y=Sepal.Width)) + #aesthetics (axes)
  geom_point()  # geom

Obligatory components to create chart:

  1. ggplot(data=iris_df) - data: data.frame, tibble
  2. ggplot(..., mappings=aes(x=Sepal.Length, y=Sepal.Width)) - aesthetics, which turned into x and y axis.
  3. + geom_point() - geom, at least one.

Adding new aesthetics (“axes”)

Color and size

ggplot(data=iris_df,
       mapping = aes(x=Sepal.Length, y=Sepal.Width,
                      color=Species, size = Petal.Width,
                     shape=Sepal.Length.Category)) +
  geom_point() 

Other aesthetics

  • shape - shape of points

  • fill - filling color

  • stroke - stroke thickness

  • alpha - transparency

Geoms

Geometries define the types of graphs in the diagram

Geoms have specific variables

iris_df %>% 
  ggplot(aes(x=Sepal.Width)) +   # <--- here
  geom_histogram(bins = 20, fill='lightblue', col='black')

Aesthetics can also be set up within the geoms

iris_df %>% 
  ggplot() + 
  geom_histogram(aes(x=Sepal.Width),
                 bins = 20, fill='lightblue', col='black')  # <--- here

  • aesthetics inside ggplot() are set for all geoms

  • aesthetics inside geom_..() functions are set only for this geom

Adding several geoms

iris_df %>% 
  ggplot(aes(x=Species, y=Sepal.Length)) + 
  geom_boxplot(aes(fill=Species)) +
  geom_jitter(width=0.1)

Differences between variables and aesthetics

As you have seen,fill, color, size, shape, stroke and alpha can be seen outside of aes(). They can also be set to a specific fixed value.

Inside the aes()

iris_df %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width)) +
  geom_point(aes(color=Species)) # <--- here

Outside the aes()

ggplot(data=iris_df,
       mapping = aes(x=Sepal.Length, y=Sepal.Width)) +
  geom_point(color='blue')  # <--- here

Storing plots in variables

p = ggplot(data=iris_df,
       mapping = aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
  geom_point()
p

Themes

Built-in themes

p + theme_bw()

p + theme_classic()

p + theme_void()

Custom themes (Advanced)

p = p + theme(axis.text = element_text(size = 15),
          axis.title = element_text(size = 20),
          panel.background = element_rect(fill = 'white', color='black'))
p

Labels

p + labs(x='Sepal length', y='Sepal width', 
         title = 'Scatter plot', subtitle = 'Subtitle',
         caption = 'It is the great plot', tag = 'A')

Simple functions for labeling axes: + xlab() and + ylab()

Let’s draw!

Scatter plot

iris_df %>% ggplot(aes(x=Petal.Length, y=Petal.Width)) + 
  geom_point() + theme_classic()

Histogram

iris_df %>% ggplot(aes(x=Petal.Length)) + 
  geom_histogram(color='blue', fill='lightblue') +
  theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Density plot

iris_df %>% ggplot(aes(x=Petal.Width, fill=Species)) + 
  geom_density(alpha=0.8) + theme_classic()

Boxplot

iris_df %>% ggplot(aes(x=Petal.Width, y=Species, fill=Species)) + 
  geom_boxplot() + theme_classic()

Barplot

sepal_len_stats = iris_df %>% group_by(Species) %>% 
  summarise(mean_sepal_len = mean(Sepal.Length), 
            sd_sepal_length = sd(Sepal.Length))

sepal_len_stats
## # A tibble: 3 × 3
##   Species    mean_sepal_len sd_sepal_length
##   <fct>               <dbl>           <dbl>
## 1 setosa               5.01           0.352
## 2 versicolor           5.94           0.516
## 3 virginica            6.59           0.636

geom_col - if heights of bars are known (continuous data)

p_col = sepal_len_stats %>% 
  ggplot(aes(x=Species, y=mean_sepal_len, fill=Species)) + 
  geom_col() + theme_classic()

p_col

Adding geom_errorbar

p_col + geom_errorbar(aes(ymin=mean_sepal_len-sd_sepal_length,
                          ymax=mean_sepal_len+sd_sepal_length),
                      width=0.3)

geom_bar - if heights are unknown (counting categorical data)

iris_df %>% ggplot(aes(x=Sepal.Length.Category, 
                       fill=Sepal.Length.Category)) + geom_bar()

position="stack" (by default)

iris_df %>% ggplot(aes(fill=Species, x=Sepal.Length.Category)) + geom_bar()

position="fill" - scaling from 0 to 1

iris_df %>% ggplot(aes(fill=Species, x=Sepal.Length.Category)) + 
  geom_bar(position = 'fill')

position="dodge"

iris_df %>% ggplot(aes(fill=Species, x=Sepal.Length.Category)) + 
  geom_bar(position = 'dodge')

ggpubr (Advanced)

# install.packages('ggpubr')
library(ggpubr)

Barplot

iris_df %>%  ggbarplot(x="Species", y="Sepal.Width",
                       fill='Species',
                       add = "mean_sd" # calculate mean and sd
                       )

Boxplot

Statisctics inside!

p_pubr = iris_df %>%  ggboxplot(x='Species', y='Sepal.Length', col='Species')

comparisons <- list( c("setosa", "versicolor"), 
                     c("setosa", "virginica"), 
                     c("virginica", "versicolor") )

p_pubr + stat_compare_means(comparisons = comparisons, 
                            label = "p.signif")+ 
  stat_compare_means(label.y = 10)

P.S

scale_color_manual()

scale_fill_manual()

facet_grid()

p + facet_grid()